#%%

import os
import sys
import re
import importlib.util
import traceback
import math
import random
from pathlib import Path
import builtins
from typing import Dict, List, Optional, Tuple, Callable, Literal, Any
from collections import Counter, deque
import time
import textwrap
import inspect

# --- Required Libraries ---
try:
    import numpy as np
    import pandas as pd # Added for saving population history
    import matplotlib.pyplot as plt
    import matplotlib.tri as tri
    import openai
    from dotenv import load_dotenv
    import scipy.optimize
    from huggingface_hub import InferenceClient
except ImportError as e:
    print(f"Error: Missing required libraries ({e}).")
    print("Please install them using: pip install numpy pandas matplotlib openai python-dotenv scipy huggingface_hub")
    sys.exit(1)

# --- Constants ---
COOPERATE = "C"
DEFECT = "D"
PAYOFFS = {
    (COOPERATE, COOPERATE): (3, 3), (COOPERATE, DEFECT):   (0, 5),
    (DEFECT, COOPERATE):   (5, 0), (DEFECT, DEFECT):     (1, 1),
}
SYSTEM_DEFAULT_FALLBACK_MOVE = DEFECT

# --- Simulation Parameters ---
POPULATION_SIZE = 30
NUM_MORAN_STEPS = 50
ROUNDS_PER_IPD_GAME = 10 # Rounds for fitness calculation in Moran step
ROUNDS_FOR_PAYOFF_MATRIX = 50 # Rounds for initial payoff matrix calculation
OUTPUT_VIS_DIR = Path("scale_tournaments")
STRATEGIES_BASE_DIR = Path("scale_generated_strategies")

MODEL_CONFIGS = [
    # Qwen Batch
    #{"api": "huggingface", "model": "Qwen/Qwen2.5-1.5B-Instruct", "label": "Qwen-1.5B", "provider": "nebius"},
    #{"api": "huggingface", "model": "Qwen/Qwen2.5-32B-Instruct", "label": "Qwen-32B", "provider": "nebius"},
    #{"api": "huggingface", "model": "Qwen/Qwen2.5-72B-Instruct", "label": "Qwen-72B", "provider": "novita"}
    # OpenAI batch
    {"api": "openai", "model": "gpt-4o-mini", "label": "gpt-4o-mini-2024-07-18"},
    {"api": "openai", "model": "gpt-4o", "label": "gpt-4o-2024-08-06"},
    {"api": "openai", "model": "o4-mini-2025-04-16", "label": "o4-mini"},
]

# Models to compete - API, Model ID, Label, and optionally Provider for HF
MODEL_LABELS = [config["label"] for config in MODEL_CONFIGS]

# Updated initial objective prompt
INITIAL_OBJECTIVE_PROMPT = (
    "This is the first evolutionary tournament run. Therefore, you have no information "
    "about opponent strategies or performance from any *previous tournaments*. "
    "Design your initial strategy based on the tournament structure described."
)

# --- Global API Key Loading & Configuration ---
API_KEYS: Dict[str, Optional[str]] = {}
try:
    script_location = Path(__file__).resolve().parent
    paths_to_check = [script_location, script_location.parent, script_location.parent.parent]
    dotenv_path_found = None
    for p_check in paths_to_check:
        temp_path = p_check / ".env"
        if temp_path.exists():
            dotenv_path_found = temp_path
            break
    if dotenv_path_found:
        load_dotenv(dotenv_path=dotenv_path_found)
        print(f"Loaded environment variables from: {dotenv_path_found}")
    else:
        print("Warning: .env file not found. Attempting to load from OS environment.")

    API_KEYS["openai"] = os.getenv("OPENAI_API_KEY")
    API_KEYS["huggingface"] = os.getenv("HF_API_KEY")

    if API_KEYS["openai"]:
        if 'openai' in sys.modules and hasattr(openai, 'api_key'):
             openai.api_key = API_KEYS["openai"]
             print("OpenAI API Key configured.")
        else:
             print("Warning: OpenAI library loaded but API key configuration failed.")
             API_KEYS["openai"] = None
    else:
        print("Warning: OPENAI_API_KEY not found.")
    if not API_KEYS["huggingface"]:
        print("Warning: HF_API_KEY not found. Hugging Face models will not be usable.")
except Exception as e:
    print(f"Error during API key loading: {e}")
    API_KEYS["openai"] = None
    API_KEYS["huggingface"] = None

# --- LLM Interface Class ---
class LLMInterface:
    def __init__(self, api_type: str, model_name: str, global_api_keys: Dict[str, Optional[str]], provider: Optional[str] = None):
        self.api_type = api_type.lower()
        self.model_name = model_name
        self.provider = provider
        self.client = None
        if self.api_type == "openai":
            if not global_api_keys.get("openai"): raise ValueError("OpenAI API key not found/configured.")
            if 'openai' not in sys.modules or not hasattr(openai, 'chat'): raise ImportError("OpenAI library not correctly loaded.")
            try:
                self.client = openai.OpenAI(api_key=global_api_keys["openai"]).chat.completions
                print(f"LLMInterface initialized for OpenAI (model: {self.model_name}).")
            except Exception as e: raise RuntimeError(f"Failed to initialize OpenAI client: {e}")
        elif self.api_type == "huggingface":
            if InferenceClient is None: raise ImportError("huggingface_hub library required.")
            hf_key = global_api_keys.get("huggingface")
            if not hf_key: raise ValueError("HF_API_KEY not found.")
            if not self.model_name: raise ValueError("model_name must be provided for Hugging Face.")
            try:
                client_args = {"model": self.model_name, "token": hf_key}
                if self.provider: client_args["provider"] = self.provider
                self.client = InferenceClient(**client_args)
                print(f"LLMInterface initialized for Hugging Face (model: {self.model_name}, provider: {self.provider or 'Default'}).")
            except Exception as e: raise RuntimeError(f"Error initializing HF InferenceClient for {self.model_name}: {e}")
        else: raise NotImplementedError(f"API type '{api_type}' not supported.")

    def _call_openai_api(self, prompt: str, max_tokens: int, temperature: float) -> Optional[str]:
        if not self.client: print(f"Error: OpenAI client not initialized ({self.model_name})."); return None
        try:
            params = {"model": self.model_name, "messages": [{"role": "user", "content": prompt}], "temperature": temperature, "n": 1, "stop": None}
            if "o4-mini" in self.model_name: params["max_completion_tokens"] = max_tokens; params["temperature"] = 1.0
            else: params["max_tokens"] = max_tokens
            response = self.client.create(**params)
            if response and response.choices and response.choices[0].message:
                content = getattr(response.choices[0].message, 'content', None)
                if content: return content.strip()
                finish_reason = getattr(response.choices[0], 'finish_reason', None)
                if finish_reason == 'content_filter': print(f"CRITICAL WARNING: Content for {self.model_name} filtered by OpenAI.")
            return None
        except openai.BadRequestError as e: print(f"OpenAI BadRequestError ({self.model_name}): {e}"); raise
        except Exception as e: print(f"Error OpenAI API call ({self.model_name}): {e}\n{traceback.format_exc()}"); return None

    def _call_huggingface_api(self, prompt: str, max_tokens: int, temperature: float) -> Optional[str]:
        if not self.client: print(f"Error: Hugging Face client not initialized ({self.model_name})."); return None
        try:
            effective_temp = max(0.01, temperature) if temperature == 0 else temperature
            if hasattr(self.client, 'chat_completion'):
                 response = self.client.chat_completion(messages=[{"role": "user", "content": prompt}], max_tokens=max_tokens, temperature=effective_temp)
                 if response.choices and response.choices[0].message:
                     content = getattr(response.choices[0].message, 'content', None)
                     return content.strip() if content else None
                 if isinstance(response, str): return response.strip() # Fallback for some HF models
                 print(f"Warning: Hugging Face API ({self.model_name}) unexpected response structure."); return None
            else: print(f"Error: HF client for {self.model_name} no 'chat_completion' method."); return None
        except Exception as e: print(f"Error Hugging Face API call ({self.model_name}): {e}\n{traceback.format_exc()}"); return None

    def generate_via_api(self, prompt: str, max_tokens: int, temperature: float) -> Optional[str]:
        if self.api_type == "openai": return self._call_openai_api(prompt, max_tokens, temperature)
        elif self.api_type == "huggingface": return self._call_huggingface_api(prompt, max_tokens, temperature)
        else: print(f"Error: API method for '{self.api_type}' not implemented."); return None

# --- Strategy Generator Class (Rewritten Prompts) ---
class StrategyGenerator:
    STRATEGY_TEXTUAL_PROMPT_TEMPLATE = textwrap.dedent("""
    You are an AI agent, Player {player_id_display}, designing your own strategy for an evolutionary tournament of Iterated Prisoner's Dilemma (IPD).

    **Tournament Structure:**
    1.  **Population:** You are one of {POPULATION_SIZE} agents.
    2.  **Evolutionary Steps:** The tournament proceeds for {NUM_MORAN_STEPS} steps using a Moran process.
    3.  **Fitness Calculation (each Moran step):**
        * You will play an IPD game of {ROUNDS_PER_IPD_GAME} rounds against EVERY OTHER agent in the current population.
        * Your total payoff from all these IPD games in a Moran step determines your 'fitness' for that step.
    4.  **Evolution (each Moran step):**
        * One agent is chosen to reproduce. Agents with higher fitness have a proportionally higher chance of being chosen.
        * One agent is chosen uniformly at random to be removed from the population.
        * The removed agent is replaced by an identical copy (offspring) of the agent chosen for reproduction.
    5.  **Initial Evaluation (before Moran process):**
        * Your strategy will be evaluated by playing IPD games of {ROUNDS_FOR_PAYOFF_MATRIX} rounds against each of the initial strategies developed for this tournament, *including an identical copy of your own strategy*.
        * This means your function will be called to play against itself (i.e., `opponent_program_code` will be your own strategy's code) to determine self-play performance, which is a component of the overall payoff matrix.
        * The results form this payoff matrix, used for theoretical analysis. Your survival in the tournament, however, depends on your performance in the subsequent Moran process.

    **Your Goal:**
    Design a Python strategy function `def {strategy_name}(my_history: list, opp_history: list, opponent_program_code: str) -> str:`
    This function should aim to maximize your own total payoff in the IPD games, thereby increasing your fitness, chances of reproduction, and long-term survival in the population.

    **Your Task (Part 1: Strategy Description):**
    1.  **Describe the core logic** for your Python strategy function `{strategy_name}`. Explain:
        * How your function will decide to Cooperate ('C') or Defect ('D') in each round of an IPD game.
        * How it will use `my_history` (list of your moves: ['C', 'D', ...]) in the current IPD game so far.
        * How it will use `opp_history` (list of opponent's moves) in the current IPD game so far.
        * How (if at all) your function will analyze the `opponent_program_code` (a string containing the Python source code of your current opponent's strategy, which could be your own code during self-play evaluation).
        * Consider edge cases (e.g., the first few moves of an IPD game, empty histories).
    2.  **Clarity:** Your description must be clear and detailed enough for direct translation into Python code in Part 2.

    **Context for Strategy Design (Player {player_id_display}):**
    {objective_prompt_segment}
    (You are designing an initial strategy for this tournament and have no prior information about opponent strategies.)

    **Output Format for THIS PART (Strategy Description):**
    * **TEXT ONLY:** Provide a clear, concise, and actionable textual description of your strategy.
    * **NO CODE:** Do NOT write any Python code or use code formatting in this strategy description part.
    * Focus on the *logic and reasoning* behind the strategy.
    * **Default Fallback Move:** After your strategy description, on a COMPLETELY NEW LINE, specify the default move if your Python function (which you'll write next) were to encounter an error or fail to return 'C' or 'D'. Use the EXACT format:
        `DEFAULT FALLBACK MOVE: C`  (or `DEFAULT FALLBACK MOVE: D`)

    Describe your strategy for `{strategy_name}` and then your default fallback move now:
    """)

    CODE_GENERATION_FROM_STRATEGY_PROMPT_TEMPLATE = textwrap.dedent("""
    You are a LLM agent: Player {player_id_display}. You must implement your Iterated Prisoner's Dilemma (IPD) strategy as a Python function for the evolutionary tournament.

    **Tournament Context Recap:**
    * Your function will be part of an agent in an evolutionary IPD tournament.
    * Population Size: {POPULATION_SIZE} agents. Moran Steps: {NUM_MORAN_STEPS}.
    * **Fitness Calculation IPD Games:** Your function will be called for {ROUNDS_PER_IPD_GAME} rounds per opponent in each Moran step.
    * **Initial Payoff Matrix IPD Games:** Your function will also be called for {ROUNDS_FOR_PAYOFF_MATRIX} rounds per opponent for an initial payoff matrix. This includes playing against an identical copy of your own strategy (where `opponent_program_code` will be your own code).

    **Your Strategy Description (from Part 1):**
    --- BEGIN STRATEGY DESCRIPTION ---
    {textual_strategy_description}
    --- END STRATEGY DESCRIPTION ---

    **Function Signature YOU MUST IMPLEMENT:**
    ```python
    def {strategy_name}(my_history: list, opp_history: list, opponent_program_code: str) -> str:
        # my_history: Your moves ('C' or 'D') in the *current IPD game* so far.
        # opp_history: Opponent's moves ('C' or 'D') in the *current IPD game* so far.
        # opponent_program_code: The source code of your opponent's strategy for *this current IPD game*.
        # RETURN: "C" for Cooperate or "D" for Defect.
    ```

    **Implementation Rules:**
    1.  **Adhere to YOUR Strategy:** Your code must implement the strategy you described in Part 1.
    2.  **Correct Signature:** The function must match the specified signature *exactly*.
    3.  **Return Value:** Must return only the string "C" or "D".
    4.  **Allowed Modules:** You can use the following pre-imported modules directly (DO NOT use `import` statements in your code):
        - `random`: For random number generation (e.g., `random.choice(['C', 'D'])`).
        - `math`: For mathematical functions (e.g., `math.sqrt`).
        - `re`: For regular expressions (if needed for analyzing `opponent_program_code`).
        - `collections.Counter`, `collections.deque`: For data structures.
        Assume `COOPERATE = "C"` and `DEFECT = "D"` are available globally if needed, but it's safest to return the literal strings "C" or "D".
    5.  **No Other Imports:** Your code MUST NOT contain any `import` statements (e.g., `import numpy`).

    **Output Format Requirements:**
    * **PYTHON CODE ONLY:** Your response MUST contain ONLY the complete Python function definition for `{strategy_name}`.
    * **NO EXTRA TEXT:** Start *immediately* with `def {strategy_name}(...)`. Do NOT include explanations, comments outside the function body, ```python fences, or any other text before or after the function definition.

    Generate the Python code for your function `{strategy_name}` now, based *strictly* on YOUR strategy description and these rules:
    """)

    def __init__(self, llm_interface: LLMInterface):
        self.llm_interface = llm_interface
        print(f"StrategyGenerator initialized with LLMInterface for {llm_interface.api_type}:{llm_interface.model_name}.")

    def _clean_llm_output(self, text_output: Optional[str], is_code: bool = False) -> Optional[str]:
        if not text_output: return None
        cleaned_output = re.sub(r'<think>.*?</think>', '', text_output, flags=re.DOTALL | re.IGNORECASE)
        if is_code:
            cleaned_output = re.sub(r"^```(?:python)?\s*", "", cleaned_output, flags=re.MULTILINE)
            cleaned_output = re.sub(r"\s*```\s*$", "", cleaned_output).strip()
        else:
            cleaned_output = re.sub(r"```(?:python)?.*?```", "[Code Block Removed]", cleaned_output, flags=re.DOTALL | re.IGNORECASE)
            cleaned_output = re.sub(r"```", "", cleaned_output)
        return cleaned_output.strip()

    def generate_textual_strategy(
        self, player_id_display: str, strategy_name: str,
        objective_prompt_segment: str,
        max_tokens: int = 3000, temperature: float = 0.7,
    ) -> Tuple[Optional[str], Optional[Literal['C', 'D']]]:
        print(f"--- {player_id_display}: Generating TEXTUAL STRATEGY for '{strategy_name}' ---")

        prompt = self.STRATEGY_TEXTUAL_PROMPT_TEMPLATE.format(
            player_id_display=player_id_display, strategy_name=strategy_name,
            POPULATION_SIZE=POPULATION_SIZE,
            NUM_MORAN_STEPS=NUM_MORAN_STEPS,
            ROUNDS_PER_IPD_GAME=ROUNDS_PER_IPD_GAME,
            ROUNDS_FOR_PAYOFF_MATRIX=ROUNDS_FOR_PAYOFF_MATRIX,
            objective_prompt_segment=objective_prompt_segment,
        )
        raw_llm_response = self.llm_interface.generate_via_api(prompt, max_tokens, temperature)
        parsed_default_move = None
        if raw_llm_response:
            match = re.search(r"DEFAULT FALLBACK MOVE:\s*([CD])", raw_llm_response, re.IGNORECASE | re.MULTILINE)
            if match:
                parsed_default_move = match.group(1).upper()
                print(f"  LLM suggested default fallback: {parsed_default_move}")
                raw_llm_response = re.sub(r"DEFAULT FALLBACK MOVE:\s*[CD].*", "", raw_llm_response, flags=re.IGNORECASE | re.MULTILINE).strip()
            else:
                print(f"  Warning: LLM did not specify 'DEFAULT FALLBACK MOVE:' for '{strategy_name}'.")
        textual_strategy = self._clean_llm_output(raw_llm_response, is_code=False)
        if not textual_strategy:
            print(f"  Error: LLM returned no textual strategy for '{strategy_name}'. Raw: '{raw_llm_response}'")
            return None, parsed_default_move
        print(f"  LLM generated textual strategy for '{strategy_name}'.")
        return textual_strategy, parsed_default_move

    def generate_strategy_code(
        self, player_id_display: str, strategy_name: str,
        objective_prompt_segment: str,
        textual_strategy_description: str,
        max_tokens: int = 3000, temperature: float = 0.2,
    ) -> Optional[str]:
        print(f"--- {player_id_display}: Generating CODE for '{strategy_name}' from textual strategy ---")

        prompt = self.CODE_GENERATION_FROM_STRATEGY_PROMPT_TEMPLATE.format(
            player_id_display=player_id_display, strategy_name=strategy_name,
            POPULATION_SIZE=POPULATION_SIZE,
            NUM_MORAN_STEPS=NUM_MORAN_STEPS,
            ROUNDS_PER_IPD_GAME=ROUNDS_PER_IPD_GAME,
            ROUNDS_FOR_PAYOFF_MATRIX=ROUNDS_FOR_PAYOFF_MATRIX,
            textual_strategy_description=textual_strategy_description,
            objective_prompt_segment=objective_prompt_segment,
        )
        raw_code = self.llm_interface.generate_via_api(prompt, max_tokens, temperature)
        code = self._clean_llm_output(raw_code, is_code=True)
        if not code:
            print(f"  Error: LLM returned no code for '{strategy_name}'. Raw: '{raw_code}'")
            return None
        if "import " in code:
             print(f"  Info: LLM generated code for '{strategy_name}' containing 'import'. Allowed imports are handled by exec env.")
        print(f"  LLM generated code snippet for '{strategy_name}'.")
        return code

# --- Default Execution Namespace & Compilation ---
ALLOWED_IMPORTS = ['random', 'math', 're', 'collections']
def _custom_import(name: str, globals_dict=None, locals_dict=None, fromlist=(), level=0):
    module_root = name.split('.')[0]
    if module_root in ALLOWED_IMPORTS: return builtins.__import__(name, globals_dict, locals_dict, fromlist, level)
    else: raise ImportError(f"Disallowed import: '{name}'. Only {ALLOWED_IMPORTS} allowed.")

DEFAULT_EXEC_NS = {
    'List': List, 'Dict': Dict, 'Optional': Optional, 'Literal': Literal, 'Any': Any,
    'random': random, 're': re, 'math': math, 'Counter': Counter, 'deque': deque,
    'COOPERATE': COOPERATE, 'DEFECT': DEFECT,
    '__builtins__': {
        'print': print, 'len': len, 'range': range, 'list': list, 'dict': dict,
        'str': str, 'int': int, 'float': float, 'bool': bool,
        'True': True, 'False': False, 'None': None,
        'max': max, 'min': min, 'sum': sum, 'abs': abs, 'round': round,
        'any': any, 'all': all, 'zip': zip, 'enumerate': enumerate,
        'sorted': sorted, 'reversed': reversed, 'set': set, 'tuple': tuple,
        '__import__': _custom_import,
    }
}

def compile_strategy_from_string(strat_name: str, code: str, llm_suggested_fallback_move: Optional[Literal['C', 'D']]) -> Optional[Tuple[str, Callable, Optional[Literal['C', 'D']]]]:
    print(f"Compiling '{strat_name}' (LLM Fallback: {llm_suggested_fallback_move})...")
    if not code: print(f"Error: Code for '{strat_name}' is empty."); return None
    try:
        exec_ns = {**DEFAULT_EXEC_NS}; exec_ns['__builtins__'] = {**DEFAULT_EXEC_NS['__builtins__']}
        exec(code, exec_ns)
        func = exec_ns.get(strat_name); actual_strat_name = strat_name
        if not func or not callable(func):
            defined_funcs = {k: v for k, v in exec_ns.items() if inspect.isfunction(v)}
            if len(defined_funcs) == 1: actual_strat_name, func = list(defined_funcs.items())[0]; print(f"  Warning: Using only defined function '{actual_strat_name}'.")
            else: print(f"  Error: Function '{strat_name}' not found/callable. Found: {list(defined_funcs.keys())}."); return None
        sig = inspect.signature(func)
        required_params = ['my_history', 'opp_history', 'opponent_program_code']
        if not all(p in sig.parameters for p in required_params): print(f"  Error: Signature for '{actual_strat_name}' incompatible."); return None
        print(f"  Successfully compiled '{actual_strat_name}'.")
        return actual_strat_name, func, llm_suggested_fallback_move
    except Exception as e: print(f"  Error compiling '{strat_name}': {e}\n{traceback.format_exc()}"); return None

# --- Core Simulation Functions (run_ipd_game, run_moran_process - Unchanged from v3.0.2) ---
def run_ipd_game(agent1_info: Dict[str, Any], agent2_info: Dict[str, Any], num_rounds: int) -> Tuple[int, int]:
    func1, func2 = agent1_info.get('func'), agent2_info.get('func')
    fallback1 = agent1_info.get('fallback', SYSTEM_DEFAULT_FALLBACK_MOVE)
    fallback2 = agent2_info.get('fallback', SYSTEM_DEFAULT_FALLBACK_MOVE)
    name1, name2 = agent1_info.get('label', 'agent1'), agent2_info.get('label', 'agent2')
    if not callable(func1) or not callable(func2): print(f"Error: Agent function not callable for {name1} or {name2}."); return 0,0
    hist1, hist2, score1, score2 = [], [], 0, 0
    code1, code2 = agent1_info.get('code', f"# Code for {name1}"), agent2_info.get('code', f"# Code for {name2}")
    for _ in range(num_rounds):
        move1, move2 = fallback1, fallback2
        try:
            action1 = func1(list(hist1), list(hist2), str(code2))
            if action1 in [COOPERATE, DEFECT]: move1 = action1
        except Exception: pass # print(f"Warning: Runtime Error in {name1}")
        try:
            action2 = func2(list(hist2), list(hist1), str(code1))
            if action2 in [COOPERATE, DEFECT]: move2 = action2
        except Exception: pass # print(f"Warning: Runtime Error in {name2}")
        hist1.append(move1); hist2.append(move2)
        payoff1, payoff2 = PAYOFFS.get((move1, move2), (0,0))
        score1 += payoff1; score2 += payoff2
    return score1, score2

def run_moran_process(initial_population_info: List[Dict[str, Any]], strategy_labels: List[str], num_steps: int, pop_size: int, rounds_per_game: int) -> List[Dict[str, float]]:
    if len(initial_population_info) != pop_size: raise ValueError(f"Initial population size mismatch.")
    if not all('label' in agent for agent in initial_population_info): raise ValueError("Agents must have 'label'.")
    population = list(initial_population_info); population_history = []
    print(f"\n--- Starting Moran Process ({num_steps} steps, N={pop_size}) ---"); start_time = time.time()
    for step in range(num_steps):
        fitness_scores = {i: 0 for i in range(pop_size)}; games_played = {i: 0 for i in range(pop_size)}
        agent_ids = list(range(pop_size))
        for i in agent_ids:
            for j in agent_ids:
                if i == j: continue
                score_i, _ = run_ipd_game(population[i], population[j], rounds_per_game)
                fitness_scores[i] += score_i; games_played[i] += 1
        average_fitness = {i: max(0, fitness_scores[i] / games_played[i] if games_played[i] > 0 else 0) for i in agent_ids}
        current_composition = Counter(agent['label'] for agent in population)
        step_fractions = {label: current_composition.get(label, 0) / pop_size for label in strategy_labels}
        population_history.append(step_fractions)
        if step % max(1, num_steps // 10) == 0 or step == num_steps - 1:
            comp_str = ", ".join([f"{lbl}: {frac:.2f}" for lbl, frac in step_fractions.items()])
            print(f"Step {step}/{num_steps} [{time.time()-start_time:.1f}s] - Composition: {{{comp_str}}}")
        total_fitness = sum(average_fitness.values())
        if total_fitness <= 1e-9: reproducer_idx = random.choice(agent_ids)
        else:
            fitness_values = np.array([average_fitness[i] for i in agent_ids]); probabilities = fitness_values / total_fitness
            if abs(np.sum(probabilities) - 1.0) > 1e-6: probabilities /= np.sum(probabilities) # Normalize
            try: reproducer_idx = np.random.choice(agent_ids, p=probabilities)
            except ValueError: reproducer_idx = random.choice(agent_ids) # Fallback
        removed_idx = random.choice(agent_ids)
        population[removed_idx] = population[reproducer_idx].copy()
    print(f"--- Moran Process Finished ({time.time() - start_time:.1f}s) ---")
    return population_history

# --- Simplex Dynamics Plotting Class (Unchanged from v3.0.2) ---
class SimplexDynamicsPlotter:
    r0, r1, r2 = np.array([0,0]), np.array([1,0]), np.array([0.5, np.sqrt(3)/2.])
    corners = np.array([r0, r1, r2]); triangle = tri.Triangulation(corners[:,0], corners[:,1])
    refiner = tri.UniformTriRefiner(triangle); trimesh = refiner.refine_triangulation(subdiv=5)
    def __init__(self, replicator_func: Callable): self.f = replicator_func; self.calculate_stationary_points(); self.calc_direction_and_strength()
    def xy2ba(self, x,y):
        detT = (self.corners[1,1]-self.corners[2,1])*(self.corners[0,0]-self.corners[2,0]) + (self.corners[2,0]-self.corners[1,0])*(self.corners[0,1]-self.corners[2,1])
        if abs(detT)<1e-12: return np.array([np.nan]*3)
        l1 = ((self.corners[1,1]-self.corners[2,1])*(x-self.corners[2,0]) + (self.corners[2,0]-self.corners[1,0])*(y-self.corners[2,1]))/detT
        l2 = ((self.corners[2,1]-self.corners[0,1])*(x-self.corners[2,0]) + (self.corners[0,0]-self.corners[2,0])*(y-self.corners[2,1]))/detT
        return np.array([l1,l2,1-l1-l2])
    def ba2xy(self, ba): ba=np.array(ba); return self.corners.T.dot(ba.T).T if ba.ndim > 1 else self.corners.T.dot(ba)
    def calculate_stationary_points(self, tol=1e-8, margin=0.005):
        fp_bary = []
        for x,y in zip(self.trimesh.x, self.trimesh.y):
            start_ba = self.xy2ba(x,y)
            if np.any(start_ba < margin) or np.any(np.isnan(start_ba)): continue
            try:
                sol = scipy.optimize.root(lambda vec: self.f(vec,0), start_ba, method="hybr", tol=tol)
                if sol.success and math.isclose(np.sum(sol.x),1,abs_tol=1e-3) and np.all((sol.x > -1e-12)&(sol.x < 1+1e-12)):
                    if not any(np.allclose(sol.x, fp, atol=1e-5) for fp in fp_bary): fp_bary.append(sol.x.tolist())
            except: continue
        self.fixpoints = self.ba2xy(np.array(fp_bary)) if fp_bary else np.array([])
        print(f"Found {len(fp_bary)} fixed points (barycentric): {fp_bary}")
    def calc_direction_and_strength(self):
        bary = np.array([self.xy2ba(x,y) for x,y in zip(self.trimesh.x, self.trimesh.y)])
        dir_ba = np.array([self.f(ba,0) if not np.any(np.isnan(ba)) else [0,0,0] for ba in bary])
        self.pvals = np.linalg.norm(dir_ba, axis=1)
        next_bary = np.clip(bary + dir_ba * 0.1, 0, 1); next_bary_sum = np.sum(next_bary, axis=1, keepdims=True); next_bary /= np.where(next_bary_sum == 0, 1, next_bary_sum) # Avoid div by zero
        curr_xy, next_xy = self.ba2xy(bary), self.ba2xy(next_bary)
        self.dir_xy = next_xy - curr_xy; norms = np.linalg.norm(self.dir_xy, axis=1)
        self.dir_norm_xy = np.divide(self.dir_xy, norms[:,np.newaxis], out=np.zeros_like(self.dir_xy), where=norms[:,np.newaxis]!=0)
    def plot_dynamics_simplex(self, ax, cmap='viridis', typelabels=["S1","S2","S3"], **kwargs):
        ax.set_facecolor('white'); ax.triplot(self.triangle, lw=0.8, c="darkgrey", zorder=1)
        if hasattr(self,'pvals'): contour = ax.tricontourf(self.trimesh, self.pvals, alpha=0.6, cmap=cmap, levels=14, zorder=2, **kwargs); plt.colorbar(contour, ax=ax, label="Flow Strength", shrink=0.7)
        if hasattr(self,'dir_norm_xy'): ax.quiver(self.trimesh.x, self.trimesh.y, self.dir_norm_xy[:,0], self.dir_norm_xy[:,1], angles='xy', pivot='mid', scale=20, width=0.004, headwidth=3.5, color='black', zorder=3)
        if hasattr(self,'fixpoints') and self.fixpoints.size > 0: ax.scatter(self.fixpoints[:,0], self.fixpoints[:,1], c="red", s=90, marker='o', edgecolors='black', lw=1.2, zorder=5, label="Fixed Points")
        mgn, fs = 0.05, 13
        ax.text(self.r0[0], self.r0[1]-mgn, typelabels[0], ha='center',va='top',fontsize=fs,weight='bold')
        ax.text(self.r1[0], self.r1[1]-mgn, typelabels[1], ha='center',va='top',fontsize=fs,weight='bold')
        ax.text(self.r2[0], self.r2[1]+mgn*0.5, typelabels[2], ha='center',va='bottom',fontsize=fs,weight='bold')
        ax.axis('equal'); ax.axis('off'); ax.set_ylim(ymin=-0.1, ymax=self.r2[1]+0.1); ax.set_xlim(xmin=-0.1, xmax=1.1)

# --- Combined Plotting Function (Unchanged from v3.0.2) ---
def plot_dynamics_and_trajectory_simplex(payoff_matrix: np.ndarray, population_history: List[Dict[str, float]], strategy_order: List[str], strategy_labels: List[str], output_dir: Path, plot_trajectory: bool = False):
    if len(strategy_order)!=3 or len(strategy_labels)!=3 or payoff_matrix.shape!=(3,3): print("Error: Simplex plot needs 3 strategies."); return
    safe_labels = [re.sub(r'[^\w\-]+','_',l) for l in strategy_labels]; run_subfolder = '_vs_'.join(safe_labels)
    specific_out_dir = output_dir / run_subfolder; specific_out_dir.mkdir(parents=True, exist_ok=True)
    outfile = specific_out_dir / f"Evolutionary_Dynamics_Simplex_{run_subfolder}.png"
    def replicator_dyn(x, t, A): x=np.clip(np.array(x),0,1); x_sum = np.sum(x); x/=(x_sum if x_sum>1e-9 else 1); ep=A.dot(x); ap=x.dot(ep); return x*(ep-ap)
    fig, ax = plt.subplots(figsize=(10,9)); plotter = None
    try: plotter = SimplexDynamicsPlotter(lambda x,t: replicator_dyn(x,t,payoff_matrix)); plotter.plot_dynamics_simplex(ax, typelabels=strategy_labels)
    except Exception as e: print(f"Error in simplex dynamics plot: {e}"); traceback.print_exc()
    if plot_trajectory and population_history and plotter: # check plotter exists
        traj_ba = np.array([[step_comp.get(name,0.0) for name in strategy_order] for step_comp in population_history]) # Use 0.0 default
        traj_ba_sum = np.sum(traj_ba, axis=1, keepdims=True)
        # Normalize, handling cases where sum is zero to avoid NaN/Inf
        traj_ba = np.divide(traj_ba, traj_ba_sum, out=np.full_like(traj_ba, 1/3), where=traj_ba_sum!=0)

        traj_xy = plotter.ba2xy(traj_ba); x_coords,y_coords = traj_xy[:,0], traj_xy[:,1]
        ax.plot(x_coords,y_coords,c='magenta',lw=2.5,ls='-',marker='.',ms=5,label='Moran Trajectory',zorder=4)
        if len(x_coords)>0:
            ax.plot(x_coords[0],y_coords[0],'o',c='lime',ms=11,label='Start',zorder=6,mec='k')
            ax.plot(x_coords[-1],y_coords[-1],'s',c='red',ms=11,label=f'End (Step {len(x_coords)-1})',zorder=6,mec='k')
    elif plot_trajectory and not plotter:
        print("Warning: Cannot plot trajectory because dynamics plotter instance is not available.")

    handles, labels = ax.get_legend_handles_labels(); by_label = dict(zip(labels, handles))
    ax.legend(by_label.values(), by_label.keys(), loc='upper left', bbox_to_anchor=(1.02,1), borderaxespad=0., fontsize=10)
    plt.title(f"Evolutionary Dynamics & Moran Trajectory (N={POPULATION_SIZE})", fontsize=16); fig.tight_layout(rect=[0,0,0.85,0.96])
    try: plt.savefig(outfile, dpi=150, bbox_inches='tight'); plt.close(fig); print(f"Saved Simplex plot: {outfile}")
    except Exception as e: print(f"Error saving plot {outfile}: {e}")


# --- Helper to create valid python identifiers (Unchanged from v3.0.2) ---
def sanitize_for_python_identifier(name: str) -> str:
    name = re.sub(r'[^a-zA-Z0-9_]', '_', name)
    name = re.sub(r'^[^a-zA-Z_]+', '', name)
    if name and name[0].isdigit(): name = '_' + name
    if not name: name = '_unnamed_strategy'
    return name

# --- Main Execution ---
if __name__ == "__main__":
    OUTPUT_VIS_DIR.mkdir(parents=True, exist_ok=True)
    STRATEGIES_BASE_DIR.mkdir(parents=True, exist_ok=True)

    if 'MODEL_LABELS' not in globals() or not MODEL_LABELS or len(MODEL_LABELS) != 3:
        print("Error: MODEL_LABELS must be 3 strings. Exiting."); sys.exit(1)
        
    safe_model_labels_for_path = [re.sub(r'[^\w\-]+', '_', label) for label in MODEL_LABELS]
    tournament_run_name = '_vs_'.join(safe_model_labels_for_path)
    
    current_run_strategies_dir = STRATEGIES_BASE_DIR / tournament_run_name
    current_run_strategies_dir.mkdir(parents=True, exist_ok=True)
    print(f"Strategies for this run saved in: {current_run_strategies_dir}")

    current_run_output_dir = OUTPUT_VIS_DIR / tournament_run_name
    current_run_output_dir.mkdir(parents=True, exist_ok=True)
    print(f"Outputs (plots, data) for this run will be saved in: {current_run_output_dir}")


    if len(MODEL_CONFIGS) != 3:
        print(f"Error: Script needs 3 MODEL_CONFIGS for simplex. Found {len(MODEL_CONFIGS)}."); sys.exit(1)

    required_keys_present = True
    for config in MODEL_CONFIGS:
        if not API_KEYS.get(config['api']):
            print(f"Error: API Key for '{config['api']}' (for model '{config['model']}') not found."); required_keys_present = False
    if not required_keys_present: print("Set API keys (OPENAI_API_KEY, HF_API_KEY) in .env or environment."); sys.exit(1)

    print("\n--- Generating Initial Strategies ---")
    initial_agent_info: Dict[str, Dict[str, Any]] = {} 

    for config in MODEL_CONFIGS:
        model_api, model_id, model_label = config["api"], config["model"], config["label"]
        model_provider = config.get("provider")
        print(f"\nGenerating strategy for {model_label} (API: {model_api}, Model: {model_id}, Provider: {model_provider or 'Default'})...")
        try:
            llm_interface = LLMInterface(model_api, model_id, API_KEYS, model_provider)
            strategy_generator = StrategyGenerator(llm_interface)
            base_name = f"{model_label}_InitialAgent"; strategy_func_name = sanitize_for_python_identifier(base_name)
            print(f"  Attempting to generate function named: {strategy_func_name}")

            textual_strategy, llm_fallback = strategy_generator.generate_textual_strategy(
                player_id_display=model_label, strategy_name=strategy_func_name,
                objective_prompt_segment=INITIAL_OBJECTIVE_PROMPT
            )
            if not textual_strategy: print(f"Error: Failed text strategy for {model_label}. Exiting."); sys.exit(1)
            
            textual_strat_filename = f"{strategy_func_name}_strategy.txt"
            textual_strat_filepath = current_run_strategies_dir / textual_strat_filename
            try:
                with open(textual_strat_filepath, "w", encoding="utf-8") as f:
                    f.write(f"# Textual Strategy for: {model_label} ({strategy_func_name})\n# LLM API: {model_api}, Model: {model_id}\n# LLM Suggested Fallback: {llm_fallback or 'Not Provided'}\n\n{textual_strategy}")
                print(f"  Saved textual strategy to: {textual_strat_filepath}")
            except Exception as e: print(f"  Warning: Could not save textual strategy for {strategy_func_name}: {e}")

            final_fallback = llm_fallback if llm_fallback in [COOPERATE, DEFECT] else SYSTEM_DEFAULT_FALLBACK_MOVE
            if llm_fallback not in [COOPERATE, DEFECT] and llm_fallback is not None: print(f"  LLM fallback '{llm_fallback}' invalid, using system default '{SYSTEM_DEFAULT_FALLBACK_MOVE}'.")
            elif llm_fallback is None: print(f"  LLM did not provide fallback, using system default '{SYSTEM_DEFAULT_FALLBACK_MOVE}'.")

            generated_code = strategy_generator.generate_strategy_code(
                 player_id_display=model_label, strategy_name=strategy_func_name,
                 objective_prompt_segment=INITIAL_OBJECTIVE_PROMPT,
                 textual_strategy_description=textual_strategy
            )
            if not generated_code: print(f"Error: Failed code generation for {model_label}. Exiting."); sys.exit(1)

            code_filename = f"{strategy_func_name}.py"
            code_filepath = current_run_strategies_dir / code_filename
            try:
                with open(code_filepath, "w", encoding="utf-8") as f:
                    f.write(f"# Generated Python Strategy for: {model_label} ({strategy_func_name})\n# LLM API: {model_api}, Model: {model_id}\n# Based on: {textual_strat_filename}\n# Effective Fallback: {final_fallback}\n\n{generated_code}")
                print(f"  Saved generated code to: {code_filepath}")
            except Exception as e: print(f"  Warning: Could not save code for {strategy_func_name}: {e}")

            compile_result = compile_strategy_from_string(strategy_func_name, generated_code, final_fallback)
            if not compile_result: print(f"Error: Failed compilation for {model_label}. Exiting."); sys.exit(1)
            
            actual_name, func_obj, effective_fallback = compile_result
            initial_agent_info[model_label] = {
                'label': model_label, 'func': func_obj, 'fallback': effective_fallback,
                'original_name': actual_name, 'code': generated_code
            }
            print(f"Successfully generated and compiled strategy for {model_label}.")
        except Exception as e:
            print(f"Critical Error for {model_label} ({model_api}:{model_id}): {e}\n{traceback.format_exc()}"); sys.exit(1)

    print("\n--- Calculating Initial Payoff Matrix ---")
    payoff_matrix = np.zeros((3, 3))
    initial_agents_ordered = [initial_agent_info[label] for label in MODEL_LABELS]
    for i in range(3):
        for j in range(3):
            agent_i, agent_j = initial_agents_ordered[i], initial_agents_ordered[j]
            print(f"  Simulating {agent_i['label']} vs {agent_j['label']} ({ROUNDS_FOR_PAYOFF_MATRIX} rounds)...")
            num_payoff_games = 3; total_score_i = 0
            for _ in range(num_payoff_games):
                score_i, _ = run_ipd_game(agent_i, agent_j, ROUNDS_FOR_PAYOFF_MATRIX)
                total_score_i += score_i
            avg_payoff_i = (total_score_i / num_payoff_games) / ROUNDS_FOR_PAYOFF_MATRIX if ROUNDS_FOR_PAYOFF_MATRIX > 0 else 0
            payoff_matrix[i, j] = avg_payoff_i
    print("\nInitial Payoff Matrix (A[i,j] = avg payoff/round for row i vs col j):")
    header = " " * 12 + " ".join([f"{label:<12}" for label in MODEL_LABELS]); print(header); print("-" * len(header))
    for i, row in enumerate(payoff_matrix): print(f"{MODEL_LABELS[i]:<12} {' '.join([f'{p:<12.3f}' for p in row])}")

    payoff_matrix_filename = current_run_output_dir / "payoff_matrix.csv"
    try:
        np.savetxt(payoff_matrix_filename, payoff_matrix, delimiter=",", header=",".join(MODEL_LABELS), comments="")
        print(f"Saved Payoff Matrix to: {payoff_matrix_filename}")
    except Exception as e: print(f"Error saving payoff matrix: {e}")

    initial_population_list = []
    agents_per_type, remainder = POPULATION_SIZE // 3, POPULATION_SIZE % 3
    for i, label in enumerate(MODEL_LABELS):
        count = agents_per_type + (1 if i < remainder else 0)
        print(f"  Initializing {count} agents of type: {label}")
        for _ in range(count): initial_population_list.append(initial_agent_info[label].copy())
    random.shuffle(initial_population_list)
    print(f"\nInitial Population Composition ({len(initial_population_list)} agents): {Counter(agent['label'] for agent in initial_population_list)}")

    population_history = run_moran_process(
        initial_population_list, MODEL_LABELS, NUM_MORAN_STEPS, POPULATION_SIZE, ROUNDS_PER_IPD_GAME
    )

    if population_history:
        pop_history_df = pd.DataFrame(population_history)
        pop_history_filename = current_run_output_dir / "moran_population_history.csv"
        try:
            pop_history_df.to_csv(pop_history_filename, index_label="moran_step")
            print(f"Saved Moran Process Population History to: {pop_history_filename}")
        except Exception as e: print(f"Error saving population history: {e}")

        plot_dynamics_and_trajectory_simplex(
            payoff_matrix, population_history, MODEL_LABELS, MODEL_LABELS,
            OUTPUT_VIS_DIR
        )
    else:
        print("No population history.")

    print("\n--- Evolutionary Tournament Script Finished ---")
#%%